Importando bibliotecas que serão utilizadas
library(xts)
library(dplyr)
library(purrr)
library(stats)
library(plotly)
library(janitor)
library(ggplot2)
library(seasonal)
library(forecast)
library(lubridate)
source("multiplot.R")
Carregando a base do Spotify
data <- readr::read_csv('https://raw.githubusercontent.com/rfordatascience/tidytuesday/master/data/2020/2020-01-21/spotify_songs.csv')
Plotando popularidade de cada um dos gêneros presentes na base
popularity_genre <- data %>%
dplyr::group_by(playlist_genre) %>%
dplyr::summarise(popularity = mean(track_popularity)) %>%
dplyr::ungroup() %>%
dplyr::mutate(playlist_genre = as.factor(playlist_genre))
p <- popularity_genre %>%
ggplot(aes(x = playlist_genre, y = popularity, fill = playlist_genre)) +
geom_bar(colour="black", stat="identity") +
labs(title = "Popularidade dos gêneros", y = "Popularidade", x = "Gênero")
ggplotly(p)
Apresentando tempo médio de cada gênero
duration_genre <- data %>%
dplyr::group_by(playlist_genre) %>%
dplyr::summarise(duration = mean(duration_ms)/1000) %>%
dplyr::ungroup() %>%
dplyr::mutate(playlist_genre = as.factor(playlist_genre))
p <- duration_genre %>%
ggplot(aes(x = playlist_genre, y = duration, fill = playlist_genre)) +
geom_bar(colour="black", stat="identity") +
labs(title = "Duração média dos gêneros", y = "Duração (s)", x = "Gênero")
ggplotly(p)
Correlacionando algumas variáveis da base
calc_indice <- function(x) {
y <- x
y[1] <- 100
for(i in 2:length(x)) {
y[i] <- (x[i]/x[i-1]) * y[i-1]
}
return(y)
}
data_correl <- data %>%
dplyr::group_by(playlist_genre) %>%
dplyr::summarise(duration = mean(duration_ms)/1000,
danceability = mean(danceability),
energy = mean(energy),
loudness = mean(loudness),
speechiness = mean(speechiness),
instrumentalness = mean(instrumentalness),
duration = mean(duration),
popularity = mean(track_popularity)) %>%
dplyr::ungroup() %>%
dplyr::select(-playlist_genre) %>%
purrr::map_df(function(x) x %>% calc_indice())
correl <- stats::cor(data_correl)
## duration danceability energy loudness speechiness
## duration 1.00000000 -0.80893028 -0.07591468 0.6495955 -0.49575879
## danceability -0.80893028 1.00000000 -0.33725448 -0.2993752 0.71645160
## energy -0.07591468 -0.33725448 1.00000000 -0.7522404 -0.52282062
## loudness 0.64959548 -0.29937515 -0.75224044 1.0000000 0.17337482
## speechiness -0.49575879 0.71645160 -0.52282062 0.1733748 1.00000000
## instrumentalness -0.19666198 -0.01581998 0.76882627 -0.7275554 -0.11602262
## popularity -0.30723462 0.18557390 -0.39306778 0.1771410 0.04669578
## instrumentalness popularity
## duration -0.19666198 -0.30723462
## danceability -0.01581998 0.18557390
## energy 0.76882627 -0.39306778
## loudness -0.72755540 0.17714103
## speechiness -0.11602262 0.04669578
## instrumentalness 1.00000000 -0.77745609
## popularity -0.77745609 1.00000000
Evolução de popularidade ao longos dos anos de cada gênero
data_evolution_popularity <- data %>%
dplyr::mutate(year = lubridate::year(as.Date(track_album_release_date))) %>%
dplyr::group_by(year, playlist_genre) %>%
dplyr::summarise(popularity = mean(track_popularity)) %>%
dplyr::ungroup() %>%
dplyr::mutate(playlist_genre = as.factor(playlist_genre))